library(tidyverse)
library(broom)
library(janitor)
library(gt)
library(gtsummary)
theme_set(theme_bw())INTRODUCTION TO SPATIAL STATISTICS IN R
library(sf)
library(terra)philly_sf <- read_rds("philadelphia_tracts.rds")st_crs(philly_sf)Coordinate Reference System:
User input: EPSG:4326
wkt:
GEOGCRS["WGS 84",
ENSEMBLE["World Geodetic System 1984 ensemble",
MEMBER["World Geodetic System 1984 (Transit)"],
MEMBER["World Geodetic System 1984 (G730)"],
MEMBER["World Geodetic System 1984 (G873)"],
MEMBER["World Geodetic System 1984 (G1150)"],
MEMBER["World Geodetic System 1984 (G1674)"],
MEMBER["World Geodetic System 1984 (G1762)"],
MEMBER["World Geodetic System 1984 (G2139)"],
MEMBER["World Geodetic System 1984 (G2296)"],
ELLIPSOID["WGS 84",6378137,298.257223563,
LENGTHUNIT["metre",1]],
ENSEMBLEACCURACY[2.0]],
PRIMEM["Greenwich",0,
ANGLEUNIT["degree",0.0174532925199433]],
CS[ellipsoidal,2],
AXIS["geodetic latitude (Lat)",north,
ORDER[1],
ANGLEUNIT["degree",0.0174532925199433]],
AXIS["geodetic longitude (Lon)",east,
ORDER[2],
ANGLEUNIT["degree",0.0174532925199433]],
USAGE[
SCOPE["Horizontal component of 3D system."],
AREA["World."],
BBOX[-90,-180,90,180]],
ID["EPSG",4326]]
st_crs(philly_sf) <- sf::st_crs(4326)Polygon Data
pt_sf <- philly_sf
class(pt_sf)[1] "sf" "data.frame"
glimpse(philly_sf)Rows: 384
Columns: 15
$ OBJECTID <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, …
$ STATEFP10 <fct> 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,…
$ COUNTYFP10 <fct> 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101,…
$ TRACTCE10 <fct> 009400, 009500, 009600, 013800, 013900, 014000, 014100, 014…
$ GEOID10 <fct> 42101009400, 42101009500, 42101009600, 42101013800, 4210101…
$ NAME10 <fct> 94, 95, 96, 138, 139, 140, 141, 142, 143, 144, 145, 146, 14…
$ NAMELSAD10 <fct> Census Tract 94, Census Tract 95, Census Tract 96, Census T…
$ MTFCC10 <fct> G5020, G5020, G5020, G5020, G5020, G5020, G5020, G5020, G50…
$ FUNCSTAT10 <fct> S, S, S, S, S, S, S, S, S, S, S, S, S, S, S, S, S, S, S, S,…
$ ALAND10 <int> 366717, 319070, 405273, 341256, 562934, 439802, 562132, 789…
$ AWATER10 <int> 0, 0, 0, 0, 0, 0, 0, 277434, 282808, 0, 0, 0, 0, 0, 0, 0, 0…
$ INTPTLAT10 <fct> +39.9632709, +39.9658709, +39.9655396, +39.9764504, +39.975…
$ INTPTLON10 <fct> -075.2322437, -075.2379140, -075.2435075, -075.1771771, -07…
$ LOGRECNO <fct> 10429, 10430, 10431, 10468, 10469, 10470, 10471, 10472, 104…
$ geometry <MULTIPOLYGON [°]> MULTIPOLYGON (((-75.22927 3..., MULTIPOLYGON (…
dim(philly_sf)[1] 384 15
head(philly_sf)| OBJECTID | STATEFP10 | COUNTYFP10 | TRACTCE10 | GEOID10 | NAME10 | NAMELSAD10 | MTFCC10 | FUNCSTAT10 | ALAND10 | AWATER10 | INTPTLAT10 | INTPTLON10 | LOGRECNO | geometry | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 42 | 101 | 009400 | 42101009400 | 94 | Census Tract 94 | G5020 | S | 366717 | 0 | +39.9632709 | -075.2322437 | 10429 | MULTIPOLYGON (((-75.22927 3… |
| 1 | 2 | 42 | 101 | 009500 | 42101009500 | 95 | Census Tract 95 | G5020 | S | 319070 | 0 | +39.9658709 | -075.2379140 | 10430 | MULTIPOLYGON (((-75.23536 3… |
| 2 | 3 | 42 | 101 | 009600 | 42101009600 | 96 | Census Tract 96 | G5020 | S | 405273 | 0 | +39.9655396 | -075.2435075 | 10431 | MULTIPOLYGON (((-75.24343 3… |
| 3 | 4 | 42 | 101 | 013800 | 42101013800 | 138 | Census Tract 138 | G5020 | S | 341256 | 0 | +39.9764504 | -075.1771771 | 10468 | MULTIPOLYGON (((-75.17341 3… |
| 4 | 5 | 42 | 101 | 013900 | 42101013900 | 139 | Census Tract 139 | G5020 | S | 562934 | 0 | +39.9750563 | -075.1711846 | 10469 | MULTIPOLYGON (((-75.17313 3… |
| 5 | 6 | 42 | 101 | 014000 | 42101014000 | 140 | Census Tract 140 | G5020 | S | 439802 | 0 | +39.9735358 | -075.1630966 | 10470 | MULTIPOLYGON (((-75.16141 3… |
## extract just spatial attributes
st_geometry(philly_sf)Geometry set for 384 features
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -75.28031 ymin: 39.86747 xmax: -74.95575 ymax: 40.13793
Geodetic CRS: WGS 84
First 5 geometries:
philly_geo <- st_geometry(philly_sf)philly_geo[[1]]ggplot(philly_sf) +
geom_sf(fill = "lightblue", color = "white") +
theme_minimal() +
labs(title = "Philadelphia Census Tracts")
Line Data
bike_lanes <- st_read("Bike_Network.shp")Reading layer `Bike_Network' from data source
`C:\Users\KAsab\Desktop\SPATIAL STATISTICS\Bike_Network.shp'
using driver `ESRI Shapefile'
Simple feature collection with 5101 features and 8 fields
Geometry type: LINESTRING
Dimension: XY
Bounding box: xmin: -75.26927 ymin: 39.87651 xmax: -74.96572 ymax: 40.124
Geodetic CRS: WGS 84
ggplot(bike_lanes) +
geom_sf(color = "steelblue", size = 0.3) +
theme_minimal() +
labs(title = "Philadelphia Bike Network")
ggplot() +
geom_sf(data = philly_sf, fill = "gray95", color = "white") +
geom_sf(data = bike_lanes, color = "steelblue", size = 0.3) +
theme_minimal() +
labs(title = "Bike Lanes Overlaid on Philadelphia Census Tracts")
st_geometry(bike_lanes)Geometry set for 5101 features
Geometry type: LINESTRING
Dimension: XY
Bounding box: xmin: -75.26927 ymin: 39.87651 xmax: -74.96572 ymax: 40.124
Geodetic CRS: WGS 84
First 5 geometries:
Points Data
philly_crimes <- read_csv("philly_crimes.csv")
class(philly_crimes)[1] "spec_tbl_df" "tbl_df" "tbl" "data.frame"
philly_crimes <- philly_crimes %>%
filter(!is.na(lat), !is.na(lng)) %>%
filter(lat > 38 & lat < 41, # Philadelphia bounding box
lng < -70 & lng > -76) # longitude should be west of 0crimes2 <-read_csv("crime2.csv")
class(crimes2)[1] "spec_tbl_df" "tbl_df" "tbl" "data.frame"
crimes <- st_as_sf(crimes2,
coords = c("Lon","Lat"),
crs = 4326)library(sf)
# Convert to sf object
philly_crimes_sf <- st_as_sf(philly_crimes,
coords = c("lng","lat"),
crs = 4326) # WGS 84# st_geometry_type(philly_crimes_sf)
st_crs(philly_crimes_sf)Coordinate Reference System:
User input: EPSG:4326
wkt:
GEOGCRS["WGS 84",
ENSEMBLE["World Geodetic System 1984 ensemble",
MEMBER["World Geodetic System 1984 (Transit)"],
MEMBER["World Geodetic System 1984 (G730)"],
MEMBER["World Geodetic System 1984 (G873)"],
MEMBER["World Geodetic System 1984 (G1150)"],
MEMBER["World Geodetic System 1984 (G1674)"],
MEMBER["World Geodetic System 1984 (G1762)"],
MEMBER["World Geodetic System 1984 (G2139)"],
MEMBER["World Geodetic System 1984 (G2296)"],
ELLIPSOID["WGS 84",6378137,298.257223563,
LENGTHUNIT["metre",1]],
ENSEMBLEACCURACY[2.0]],
PRIMEM["Greenwich",0,
ANGLEUNIT["degree",0.0174532925199433]],
CS[ellipsoidal,2],
AXIS["geodetic latitude (Lat)",north,
ORDER[1],
ANGLEUNIT["degree",0.0174532925199433]],
AXIS["geodetic longitude (Lon)",east,
ORDER[2],
ANGLEUNIT["degree",0.0174532925199433]],
USAGE[
SCOPE["Horizontal component of 3D system."],
AREA["World."],
BBOX[-90,-180,90,180]],
ID["EPSG",4326]]
class(crimes)[1] "sf" "tbl_df" "tbl" "data.frame"
ggplot() +
geom_sf(data = philly_sf, fill = "gray95", color = "white") +
geom_sf(data = philly_crimes_sf, color = "red", alpha = 0.4, size = 0.5) +
theme_minimal() +
labs(title = "Philadelphia Crime Locations")
# st_geometry_type(philly_crimes_sf)
st_crs(philly_crimes_sf)Coordinate Reference System:
User input: EPSG:4326
wkt:
GEOGCRS["WGS 84",
ENSEMBLE["World Geodetic System 1984 ensemble",
MEMBER["World Geodetic System 1984 (Transit)"],
MEMBER["World Geodetic System 1984 (G730)"],
MEMBER["World Geodetic System 1984 (G873)"],
MEMBER["World Geodetic System 1984 (G1150)"],
MEMBER["World Geodetic System 1984 (G1674)"],
MEMBER["World Geodetic System 1984 (G1762)"],
MEMBER["World Geodetic System 1984 (G2139)"],
MEMBER["World Geodetic System 1984 (G2296)"],
ELLIPSOID["WGS 84",6378137,298.257223563,
LENGTHUNIT["metre",1]],
ENSEMBLEACCURACY[2.0]],
PRIMEM["Greenwich",0,
ANGLEUNIT["degree",0.0174532925199433]],
CS[ellipsoidal,2],
AXIS["geodetic latitude (Lat)",north,
ORDER[1],
ANGLEUNIT["degree",0.0174532925199433]],
AXIS["geodetic longitude (Lon)",east,
ORDER[2],
ANGLEUNIT["degree",0.0174532925199433]],
USAGE[
SCOPE["Horizontal component of 3D system."],
AREA["World."],
BBOX[-90,-180,90,180]],
ID["EPSG",4326]]
plot(st_geometry(philly_crimes_sf)) # base R check
Aggregate Crime Counts by Tract
#For each crime point, find the census tract polygon it falls within, and append the tract attributes to the crime.
joined <- st_join(philly_crimes_sf, philly_sf) # assigns each point to a tract
crime_counts <- joined %>%
group_by(GEOID10) %>% # or appropriate tract ID
summarise(n = n())
philly_sf <- left_join(
philly_sf,
st_drop_geometry(crime_counts),
by = "GEOID10"
)
ggplot(philly_sf) +
geom_sf(aes(fill = n)) +
scale_fill_viridis_c(na.value = "white") +
theme_minimal() +
labs(title = "Crime Counts per Census Tract")
Time-Series or Temporal Filtering
recent_crimes <- philly_crimes_sf %>%
filter(dispatch_date > as.Date("2025-07-01"))
ggplot() +
geom_sf(data = philly_sf, fill = "gray90") +
geom_sf(data = recent_crimes, color = "blue", alpha = 0.5, size = 0.3) +
labs(title = "Crimes Since July 2025")+
theme_bw()
ggplot() +
geom_sf(data = philly_sf, fill = "gray90", color = NA) +
stat_density_2d(
data = st_coordinates(philly_crimes_sf) %>% as.data.frame(),
aes(x = X, y = Y, fill = after_stat(level)),
geom = "polygon", alpha = 0.4
) +
scale_fill_viridis_c() +
labs(title = "Crime Density Heatmap") +
theme_minimal()+
theme(axis.title = element_blank())
philly_crimes_sf |>
distinct(text_general_code) |>
print(n = Inf)# A tibble: 30 × 1
text_general_code
<chr>
1 Robbery No Firearm
2 Aggravated Assault No Firearm
3 Aggravated Assault Firearm
4 Thefts
5 Burglary Non-Residential
6 Burglary Residential
7 Robbery Firearm
8 Theft from Vehicle
9 Motor Vehicle Theft
10 Rape
11 All Other Offenses
12 Narcotic / Drug Law Violations
13 Other Assaults
14 Disorderly Conduct
15 Arson
16 Fraud
17 Weapon Violations
18 Prostitution and Commercialized Vice
19 Other Sex Offenses (Not Commercialized)
20 Forgery and Counterfeiting
21 Vandalism/Criminal Mischief
22 Receiving Stolen Property
23 DRIVING UNDER THE INFLUENCE
24 Offenses Against Family and Children
25 Gambling Violations
26 Liquor Law Violations
27 Embezzlement
28 Public Drunkenness
29 Vagrancy/Loitering
30 Homicide - Criminal
homicide <- philly_crimes_sf |>
filter(text_general_code == "Homicide - Criminal")
fraud <- philly_crimes_sf |>
filter(text_general_code == "Fraud") ggplot()+
geom_sf(data = philly_sf, fill = "gray95", color = "white")+
geom_sf(data = homicide, color = "red")+
geom_sf(data = fraud, color = "blue")+
theme_minimal()
Rasta Dataset
library(datasets)
class(volcano)[1] "matrix" "array"
volcano <- volcanolibrary(raster)
volcano_raster <- raster(volcano)
plot(volcano_raster, main = "Volcano Raster (raster pkg)")
library(terra)
volcano_terra <- rast(volcano)
plot(volcano_terra, main = "Volcano Raster (terra pkg)")
library(raster)
library(ggplot2)
# 1. Convert matrix to raster
volcano_raster <- raster(volcano)
# 2. Convert to data frame
volcano_df <- as.data.frame(volcano_raster, xy = TRUE)
names(volcano_df)[3] <- "elevation" # Rename layer column
# 3. Plot with ggplot2
ggplot(volcano_df, aes(x = x, y = y, fill = elevation)) +
geom_raster() +
scale_fill_viridis_c() +
coord_equal() +
theme_minimal() +
labs(title = "Volcano Elevation Map", fill = "Elevation")
library(terra)
volcano_rast <- rast(volcano)
volcano_df <- as.data.frame(volcano_rast, xy = TRUE)
names(volcano_df)[3] <- "elevation"
ggplot(volcano_df, aes(x = x, y = y, fill = elevation)) +
geom_raster() +
scale_fill_viridis_c() +
coord_equal() +
theme_minimal()+
labs(title = "Volcano Elevation Map", fill = "Elevation")
library(ggplot2)
# Convert volcano matrix to data frame
volcano_df <- as.data.frame(as.table(volcano))
names(volcano_df) <- c("y", "x", "elevation")
# Create plot with raster and contour lines
ggplot(volcano_df, aes(x = x, y = y, z = elevation)) +
geom_raster(aes(fill = elevation)) +
geom_contour(color = "black", size = 0.3) +
scale_fill_viridis_c(name = "Elevation") +
labs(title = "Volcano Elevation Map with Contours") +
theme_minimal()
Spatial Hypothesis Testing
nyc <- st_read("Counties.shp")Reading layer `Counties' from data source
`C:\Users\KAsab\Desktop\SPATIAL STATISTICS\Counties.shp' using driver `ESRI Shapefile'
Simple feature collection with 62 features and 17 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: 105571.4 ymin: 4480943 xmax: 779932.1 ymax: 4985476
Projected CRS: NAD83 / UTM zone 18N
nys_lyme_data <- read_rds("nys_lyme_data.rds")
lyme <- nys_lyme_dataclass(nys_lyme_data)[1] "sf" "data.frame"
ggplot()+
geom_sf(data = nyc,color = "black",fill = "gray95")+
theme_minimal()+
labs(title = 'New York City Counties')
ggplot()+
geom_sf(data = nys_lyme_data,color = "steelblue",fill = "gray95")+
theme_minimal()+
labs(title = 'New York State Lyme Disease Data')
st_geometry((nys_lyme_data))Geometry set for 62 features
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: 105571.4 ymin: 4480943 xmax: 779932.1 ymax: 4985476
Projected CRS: NAD83 / UTM zone 18N
First 5 geometries:
library(tmap)
tmap_mode("view") # set mode to interactive
tm_shape(nys_lyme_data)+ # specify sf object with geographic attribute of interest
tm_polygons("Lyme.Incidence.Rate") # specify column with value of interest# St lawrence has missing values for ithe incidence rate so you remove it
nys_lyme_data <- nys_lyme_data |>
filter(!is.na(Lyme.Incidence.Rate))tmap_mode("view")
tm_shape(nys_lyme_data)+
tm_polygons("Lyme.Incidence.Rate")Global Clustering (Moran’s I)
summary(nys_lyme_data[["Lyme.Incidence.Rate"]]) Min. 1st Qu. Median Mean 3rd Qu. Max.
1.90 13.50 36.30 80.27 98.50 599.60
nys_lyme_data |>
ggplot(aes(x = Lyme.Incidence.Rate))+
geom_histogram(color = "white")
nys_lyme_data |>
ggplot(aes(x = Lyme.Incidence.Rate))+
geom_boxplot()
nys_lyme_data <- nys_lyme_data |>
mutate(log_lyme_incidence = log(Lyme.Incidence.Rate))nys_lyme_data |>
ggplot(aes(x = log_lyme_incidence))+
geom_histogram(color = "white",bins = 20)
nys_lyme_data |>
ggplot(aes(x = log_lyme_incidence))+
geom_boxplot()
tmap_mode("view")
tm_shape(nys_lyme_data)+
tm_polygons("log_lyme_incidence")Define Neighbouring Polygons
library(spdep)
## create nb object from lyme dataset
lyme_nb <- poly2nb(nys_lyme_data,queen = TRUE) # queen case
# poly2nb is for finding contiguinity neighborsclass(lyme_nb)[1] "nb"
str(lyme_nb)List of 61
$ : int [1:6] 11 20 42 45 46 47
$ : int [1:4] 5 26 50 60
$ : int [1:4] 30 31 41 59
$ : int [1:4] 9 12 13 53
$ : int [1:4] 2 7 15 60
$ : int [1:7] 12 23 34 38 49 54 58
$ : int [1:2] 5 15
$ : int [1:4] 48 50 53 54
$ : int [1:5] 4 12 13 27 39
$ : int [1:2] 16 17
$ : int [1:5] 1 14 20 42 55
$ : int [1:7] 4 6 9 27 34 53 54
$ : int [1:7] 4 9 20 39 47 52 55
$ : int [1:4] 11 36 40 55
$ : int [1:5] 5 7 19 32 60
$ : int [1:5] 10 17 21 56 57
$ : int [1:3] 10 16 21
$ : int [1:4] 21 22 29 45
$ : int [1:6] 15 26 28 32 37 60
$ : int [1:6] 1 11 13 42 47 55
$ : int [1:6] 16 17 18 22 45 56
$ : int [1:6] 18 21 25 29 33 39
$ : int [1:3] 6 25 38
$ : int [1:3] 31 41 43
$ : int [1:4] 22 23 33 38
$ : int [1:6] 2 19 28 35 50 60
$ : int [1:6] 9 12 33 34 38 39
$ : int [1:5] 19 26 35 37 58
$ : int [1:6] 18 22 39 45 46 47
$ : int [1:4] 3 41 51 59
$ : int [1:3] 3 24 41
$ : int [1:3] 15 19 37
$ : int [1:5] 22 25 27 38 39
$ : int [1:4] 6 12 27 38
$ : int [1:6] 26 28 49 50 58 61
$ : int [1:5] 14 40 44 52 55
$ : int [1:3] 19 28 32
$ : int [1:6] 6 23 25 27 33 34
$ : int [1:7] 9 13 22 27 29 33 47
$ : int [1:4] 14 36 44 59
$ : int [1:5] 3 24 30 31 43
$ : int [1:5] 1 11 20 45 57
$ : int [1:2] 24 41
$ : int [1:3] 36 40 59
$ : int [1:8] 1 18 21 29 42 46 56 57
$ : int [1:4] 1 29 45 47
$ : int [1:6] 1 13 20 29 39 46
$ : int [1:5] 8 49 50 54 61
$ : int [1:6] 6 35 48 54 58 61
$ : int [1:6] 2 8 26 35 48 61
$ : int 30
$ : int [1:3] 13 36 55
$ : int [1:4] 4 8 12 54
$ : int [1:6] 6 8 12 48 49 53
$ : int [1:6] 11 13 14 20 36 52
$ : int [1:4] 16 21 45 57
$ : int [1:4] 16 42 45 56
$ : int [1:4] 6 28 35 49
$ : int [1:4] 3 30 40 44
$ : int [1:5] 2 5 15 19 26
$ : int [1:4] 35 48 49 50
- attr(*, "class")= chr "nb"
- attr(*, "region.id")= chr [1:61] "1" "2" "3" "4" ...
- attr(*, "call")= language poly2nb(pl = nys_lyme_data, queen = TRUE)
- attr(*, "type")= chr "queen"
- attr(*, "snap")= num 0.01
- attr(*, "sym")= logi TRUE
- attr(*, "ncomp")=List of 2
..$ nc : num 1
..$ comp.id: num [1:61] 1 1 1 1 1 1 1 1 1 1 ...
# view neighbors of first polygon
lyme_nb[[1]][1] 11 20 42 45 46 47
nys_lyme_data[["NAME"]][1][1] "Albany"
nys_lyme_data[["NAME"]][c(11,20,42,45,46,47)][1] "Columbia" "Greene" "Rensselaer" "Saratoga" "Schenectady"
[6] "Schoharie"
Assigning Weights to neighbors
## calculate weights from nb object, we will specify style = "W" for equal weights
lyme_weights <- nb2listw(lyme_nb, style = "W")
class(lyme_weights)[1] "listw" "nb"
str(lyme_weights, max.level = 1)List of 3
$ style : chr "W"
$ neighbours:List of 61
..- attr(*, "class")= chr "nb"
..- attr(*, "region.id")= chr [1:61] "1" "2" "3" "4" ...
..- attr(*, "call")= language poly2nb(pl = nys_lyme_data, queen = TRUE)
..- attr(*, "type")= chr "queen"
..- attr(*, "snap")= num 0.01
..- attr(*, "sym")= logi TRUE
..- attr(*, "ncomp")=List of 2
$ weights :List of 61
..- attr(*, "mode")= chr "binary"
..- attr(*, "W")= logi TRUE
..- attr(*, "comp")=List of 1
- attr(*, "class")= chr [1:2] "listw" "nb"
- attr(*, "region.id")= chr [1:61] "1" "2" "3" "4" ...
- attr(*, "call")= language nb2listw(neighbours = lyme_nb, style = "W")
- attr(*, "zero.policy")= logi FALSE
## The weights of the neighbors for the first polygon (Albany)
## Albany has 6 neighbors
lyme_weights[["weights"]][1][[1]]
[1] 0.1666667 0.1666667 0.1666667 0.1666667 0.1666667 0.1666667
lyme[["NAME"]][2][1] "Allegany"
lyme_nb[[2]][1] 5 26 50 60
lyme_weights[["weights"]][2][[1]]
[1] 0.25 0.25 0.25 0.25
Perform Hypothesis Testing
## calculate Moran's I statistic and perform hypothesis testing using "moran.test" (analytical calculation) and "moran.mc" (via montecarlo simulation)
## These functions require that we specify the variable of interest and the list of neighbor weights for each polygon
## The option alternative = "greater" specifies testing foe positive spatial autocorrelation , which is alo the default for these functions
## The "moran.mc" function also requires that we specify the number of simulations with option "nsim"## Analytical test - quicker computation but sensitive to irregularly distributed polygons
moran.test(nys_lyme_data[["log_lyme_incidence"]],lyme_weights,alternative = "greater")
Moran I test under randomisation
data: nys_lyme_data[["log_lyme_incidence"]]
weights: lyme_weights
Moran I statistic standard deviate = 8.7379, p-value < 2.2e-16
alternative hypothesis: greater
sample estimates:
Moran I statistic Expectation Variance
0.720555645 -0.016666667 0.007118479
moran.test(nys_lyme_data[["log_lyme_incidence"]],lyme_weights,alternative = "greater") |>
tidy()| estimate1 | estimate2 | estimate3 | statistic | p.value | method | alternative |
|---|---|---|---|---|---|---|
| 0.7205556 | -0.0166667 | 0.0071185 | 8.737856 | 0 | Moran I test under randomisation | greater |
## Monte carlo simulation is slower but the preferred method for an accurate p-value
MC <- moran.mc(nys_lyme_data[["log_lyme_incidence"]],lyme_weights,nsim = 999,alternative = "greater")
MC
Monte-Carlo simulation of Moran I
data: nys_lyme_data[["log_lyme_incidence"]]
weights: lyme_weights
number of simulations + 1: 1000
statistic = 0.72056, observed rank = 1000, p-value = 0.001
alternative hypothesis: greater
log_incidence_rates <- nys_lyme_data |> pull(log_lyme_incidence)
mc_result <- moran.mc(log_incidence_rates,lyme_weights,nsim = 999,alternative = "greater")
mc_result
Monte-Carlo simulation of Moran I
data: log_incidence_rates
weights: lyme_weights
number of simulations + 1: 1000
statistic = 0.72056, observed rank = 1000, p-value = 0.001
alternative hypothesis: greater
names(mc_result)[1] "statistic" "parameter" "p.value" "alternative" "method"
[6] "data.name" "res"
library(ggplot2)
# Convert to data frame for ggplot
mc_df <- data.frame(simulated_I = mc_result$res)
ggplot(mc_df, aes(x = simulated_I)) +
geom_histogram(fill = "gray80", color = "white", bins = 30) +
geom_vline(xintercept = mc_result$statistic, color = "red", size = 1.2) +
labs(title = "Monte Carlo Simulation of Moran's I",
x = "Simulated Moran's I",
y = "Frequency",
subtitle = paste("Observed Moran's I =", round(mc_result$statistic, 4))) +
theme_minimal()
ggplot(mc_df, aes(x = simulated_I)) +
geom_density(fill = "gray80", color = "white") +
geom_vline(xintercept = mc_result$statistic, color = "red", size = 1.2) +
labs(title = "Monte Carlo Simulation of Moran's I",
x = "Simulated Moran's I",
y = "Frequency",
subtitle = paste("Observed Moran's I =", round(mc_result$statistic, 4))) +
theme_minimal()
Assessing Local Trends
library(rgeoda)
library(spdep)
## find queen_case contiguous weights
lyme_gweights <- queen_weights(nys_lyme_data)
class(lyme_gweights)[1] "Weight"
attr(,"package")
[1] "rgeoda"
str(lyme_gweights)Reference class 'Weight' [package "rgeoda"] with 9 fields
$ gda_w :Formal class 'p_GeoDaWeight' [package "rgeoda"] with 1 slot
.. ..@ pointer:<externalptr>
$ is_symmetric : logi TRUE
$ sparsity : num 0.0763
$ min_neighbors : int 1
$ max_neighbors : int 8
$ num_obs : int 61
$ mean_neighbors : num 4.66
$ median_neighbors: num 5
$ has_isolates : logi FALSE
and 27 methods, of which 13 are possibly relevant:
GetNeighbors, GetNeighborSize, GetNeighborWeights, GetPointer, GetSparsity,
HasIsolates, initialize, IsSymmetric, SaveToFile, SetNeighbors,
SetNeighborsAndWeights, SpatialLag, Update
## see the neighbors of the first polygon (Albany)
get_neighbors(lyme_gweights,1)[1] 11 20 42 45 46 47
nys_lyme_data[["NAME"]][1][1] "Albany"
nys_lyme_data[["NAME"]][get_neighbors(lyme_gweights,1)][1] "Columbia" "Greene" "Rensselaer" "Saratoga" "Schenectady"
[6] "Schoharie"
## get neighbor weights of first and second polygons
get_neighbors_weights(lyme_gweights,1)[1] 1 1 1 1 1 1
get_neighbors_weights(lyme_gweights,2)[1] 1 1 1 1
Calculate Local Moran I statistic
## local_moran function requires a one-column data frame
log_incidence_df <- as.data.frame(nys_lyme_data[["log_lyme_incidence"]])
lyme_lisa <- local_moran(lyme_gweights,log_incidence_df)
## local_moran returns a LISA object
class(lyme_lisa)[1] "LISA"
attr(,"package")
[1] "rgeoda"
## There will be a Moran's I statistic and associated pseudo p_value at each polygon
names(lyme_lisa) [1] "colors" ".->p_vals"
[3] ".->labels" "p_vals"
[5] "GetLocalSignificanceValues" "labels"
[7] ".self" ".->nn_vals"
[9] ".->lisa_vals" ".refClassDef"
[11] "lisa_vals" ".->gda_lisa"
[13] "initialize" "gda_lisa"
[15] "nn_vals" ".->colors"
[17] ".->c_vals" "c_vals"
lyme_lisa[["lisa_vals"]] ## View local Moran's I values for each polygon [1] 6.303589e-01 1.106106e+00 1.414135e+00 4.771674e-01 1.205222e+00
[6] 6.955438e-02 2.297059e+00 3.348814e-01 3.241632e-01 -3.410545e-02
[11] 2.786406e+00 -1.137256e-02 7.590118e-01 1.810965e+00 2.370781e+00
[16] 1.539095e-01 -7.265369e-02 -3.917429e-02 1.341750e+00 2.403365e+00
[21] -5.327627e-02 4.522845e-03 -2.021608e-02 6.452192e-01 -3.709835e-05
[26] 1.369921e+00 -5.424938e-02 8.254362e-01 -1.846812e-03 9.200084e-01
[31] 6.151358e-01 2.915899e+00 -6.473286e-02 1.193171e-01 2.607967e-01
[36] 1.225206e+00 1.797554e+00 -4.994189e-02 2.284720e-01 1.108447e+00
[41] 1.366207e+00 2.002465e+00 4.258473e-01 4.793295e-01 3.220550e-01
[46] 1.858658e-01 5.146906e-01 2.360824e-01 4.121139e-03 -3.754591e-02
[51] -1.523086e-01 9.750675e-01 5.005027e-01 1.171338e-01 1.861588e+00
[56] 3.084596e-01 8.629906e-01 4.824627e-01 1.943183e-02 1.642308e+00
[61] -2.581274e-03
lyme_lisa[["p_vals"]] ## View pseudo p-values [1] 0.001 0.019 0.046 0.129 0.001 0.425 0.048 0.097 0.156 0.338 0.001 0.222
[13] 0.002 0.001 0.001 0.309 0.389 0.399 0.001 0.001 0.220 0.438 0.335 0.102
[25] 0.496 0.024 0.306 0.007 0.148 0.059 0.006 0.010 0.315 0.284 0.085 0.003
[37] 0.010 0.217 0.261 0.085 0.014 0.001 0.047 0.065 0.099 0.169 0.022 0.242
[49] 0.482 0.248 0.157 0.020 0.121 0.350 0.002 0.105 0.018 0.096 0.335 0.002
[61] 0.493
map_colors <- lisa_colors(lyme_lisa)
map_labels <- lisa_labels(lyme_lisa)
map_clusters <- lisa_clusters(lyme_lisa)# Extract cluster IDs from the LISA result
cluster_id <- lisa_clusters(lyme_lisa)
# Use only the labels and colors for the present cluster IDs (0, 1, 2)
labels_all <- lisa_labels(lyme_lisa)
colors_all <- lisa_colors(lyme_lisa)
nys_lyme_data$cluster_label <- labels_all[as.integer(cluster_id) + 1]
nys_lyme_data$cluster_color <- colors_all[as.integer(cluster_id) + 1]table(nys_lyme_data$cluster_label)
High-High Low-Low Not significant
11 14 36
table(nys_lyme_data$cluster_color)
#0000FF #eeeeee #FF0000
14 36 11
ggplot(nys_lyme_data) +
geom_sf(aes(fill = cluster_label), color = "white", size = 0.1) +
scale_fill_manual(
values = setNames(nys_lyme_data$cluster_color, nys_lyme_data$cluster_label),
name = "LISA Cluster"
) +
labs(title = "Local Moran's I Cluster Map (Lyme Disease in NYS)") +
theme_minimal()
library(ggplot2)
ggplot(nys_lyme_data) +
geom_sf(aes(fill = cluster_label), color = "white", size = 0.2) +
scale_fill_manual(
values = setNames(unique(nys_lyme_data$cluster_color), unique(nys_lyme_data$cluster_label)),
name = "LISA Cluster"
) +
theme_minimal() +
labs(title = "Local Moran's I Cluster Map of Lyme Disease in NY State")
library(tmap)
# Ensure your spatial data is in sf format and has cluster_color and cluster_label
nys_lyme_data$cluster_label <- factor(nys_lyme_data$cluster_label)
# Set tmap to interactive mode
tmap_mode("view")
# Create interactive map
tm_shape(nys_lyme_data) +
tm_polygons(
col = "cluster_label",
palette = c("High-High" = "#FF0000", "Low-Low" = "#0000FF", "Not significant" = "#eeeeee"),
title = "LISA Cluster",
id = "NAME", # tooltip: county name
popup.vars = c("Cluster" = "cluster_label", "Cases" = "log_lyme_incidence")
) +
tm_layout(title = "Local Moran's I Lyme Clusters (NY)", legend.outside = TRUE)library(sf)
# Reproject to WGS84 (EPSG:4326)
nys_lyme_data_wgs84 <- st_transform(nys_lyme_data, crs = 4326)library(leaflet)
# Define color palette function
pal <- colorFactor(
palette = c("High-High" = "#FF0000",
"Low-Low" = "#0000FF",
"Not significant" = "#eeeeee"),
domain = nys_lyme_data_wgs84$cluster_label
)
# Create leaflet map
leaflet(nys_lyme_data_wgs84) %>%
addProviderTiles(providers$CartoDB.Positron) %>%
addPolygons(
fillColor = ~pal(cluster_label),
fillOpacity = 0.7,
weight = 1,
color = "#555",
label = ~paste(NAME, "<br>", cluster_label),
highlightOptions = highlightOptions(
weight = 2,
color = "#000",
bringToFront = TRUE
)
) %>%
addLegend("bottomright", pal = pal, values = ~cluster_label,
title = "LISA Cluster")library(leaflet)
leaflet() %>%
addTiles() %>% # Adds OpenStreetMap tile
addMarkers(lng = -74.006, lat = 40.7128, popup = "New York City")map_colors <- lisa_colors(lyme_lisa)
map_labels <- lisa_labels(lyme_lisa)
map_clusters <- lisa_clusters(lyme_lisa)lisa_clusters(lyme_lisa) [1] 1 2 2 0 2 0 2 0 0 0 1 0 1 1 2 0 0 0 2 1 0 0 0 0 0 2 0 2 0 0 2 2 0 0 0 1 2 0
[39] 0 0 2 1 2 0 0 0 1 0 0 0 0 1 0 0 1 0 1 0 0 2 0
# High-High = 1
# Low-Low = 2
# Not significant = 0